
*************************************************************************
********** THIS .DO FILE FINALIZES THE CREATION OF THE DATASET **********
*************************************************************************

clear *

use "${root}/data/processed/main_database.dta", clear

* Create useful local macros to be used in the rest of the code
local time "_t_minus3 _t_minus2 _t_minus1 _t_0 _t_plus1 _t_plus2 _t_plus3 _t_plus4"
local vars "tot_exp tax_rev state_transf fed_transf current_exp capital_exp personnel investments tot_rev social_exp health education welfare nonsocial_exp housing transport other"
local measure "_cp  _pc _sgdp _share"

/* for years < 2002, it is not possible to separate welfare and pensions. We then define vars using welfare+pension for years < 2002 
* Note: this will not affect any result, except partly the estimation of the fourth lag in the placebo tests looking at lagged 'effects' */
foreach varsocial in social_exp welfare nonsocial_exp other  {
	foreach s of local measure {
			foreach z in "_t_minus3" "_t_minus2" "_t_minus1" "_t_0" "_t_plus1"{
				replace `varsocial'`s'`z' = `varsocial'_p`s'`z' if (year == 2000) | (year == 2004 & "`z'" == "_t_minus3")
			}
		}
	}

* organize as a panel with a city as one unit of observation and a mayoral term as one time-period
destring mun_code, replace
xtset mun_code year


********************************
** Create lame-duck indicator **
********************************
qui gen incumbent_mayor=""
qui replace incumbent_mayor = rank_1_nome_candidato[_n-1] if (mun_code==mun_code[_n-1] & year-year[_n-1]==4)
qui gen lame_duck = .
qui replace lame_duck = 0 if (rank_1_nome_candidato != incumbent_mayor & rank_1_nome_candidato!="" & incumbent_mayor!="")
qui replace lame_duck = 1 if (rank_1_nome_candidato == incumbent_mayor & rank_1_nome_candidato!="" & incumbent_mayor!="")
tab lame_duck, missing


*************************************************************************
** Create incumbent party (to be used in incumbent manipulation tests) **
*************************************************************************
qui gen incumbent_party=""
qui replace incumbent_party = rank_1_sigla_partido[_n-1] if (mun_code==mun_code[_n-1] & year-year[_n-1]==4)


*************************
** Some house-cleaning **
*************************

* Drop 2000 election
drop if year == 2000

* Create year dummies
tab year, gen(yy)

* Create average municipal population (mun_pop) during the term
qui egen mun_pop_avg = rowmean(mun_pop_t_plus1 mun_pop_t_plus2 mun_pop_t_plus3 mun_pop_t_plus4)


*******************************************************************
** Create baseline sample indicator and prepare fiscal variables **
*******************************************************************

* Create baseline sample indicator (two possible definitions)
gen baseline_sample = 1
replace baseline_sample = . if irregular == 1

gen placebo_lagged_sample = 1
replace placebo_lagged_sample = . if irregular == 1

* Compute term-averages of fiscal variables, taking logs multiplied by 100 for per capita & constant prices values (so results can be interpreted as % differences)
foreach v of local vars {
	foreach s of local measure {

		* Create average fiscal variables over the term (levels)
		qui gen `v'_avg`s' = (`v'`s'_t_plus1 + `v'`s'_t_plus2 + `v'`s'_t_plus3 + `v'`s'_t_plus4)/4 /* average over the whole term */
		qui gen `v'_3y_avg`s' = (`v'`s'_t_plus2 + `v'`s'_t_plus3 + `v'`s'_t_plus4)/3 /* average excluding first year */
				
		* Update baseline sample indicator
		qui replace baseline_sample = 0  if (`v'_avg`s' == . | margin_mayor_left == .) & irregular == 0 /* jde manuscript uses this one */
		qui replace placebo_lagged_sample = 0  if (`v'`s'_t_minus3 == . | `v'`s'_t_minus2 == . | `v'`s'_t_minus1 == . |`v'`s'_t_0 == . |`v'_avg`s' == . | margin_mayor_left == .) & irregular == 0
		
		
		* Create average fiscal variables over the previous term (for placebo test) (levels)
		qui gen `v'_lag_avg`s' = (`v'`s'_t_minus3 + `v'`s'_t_minus2 + `v'`s'_t_minus1 + `v'`s'_t_0)/4
				
		* Duplicate yearly variables- then to be transformed in logs for p.c. and c.p. variables
		qui gen `v'`s'_1st=`v'`s'_t_plus1
		qui gen `v'`s'_2nd=`v'`s'_t_plus2
		qui gen `v'`s'_3rd=`v'`s'_t_plus3
		qui gen `v'`s'_4th=`v'`s'_t_plus4
		qui gen `v'`s'_0=`v'`s'_t_0
		qui gen `v'`s'_1minus=`v'`s'_t_minus1
		qui gen `v'`s'_2minus=`v'`s'_t_minus2
		qui gen `v'`s'_3minus=`v'`s'_t_minus3
		
		* Take logs*100 of per capita (pc) and constant prices (cp) variables
		if "`s'"=="_pc"|"`s'"=="_cp" {
			
			dis as text "`v'`s': take logs"
			qui replace `v'_avg`s'=ln(`v'_avg`s')*100
			qui replace `v'_3y_avg`s'=ln(`v'_3y_avg`s')*100
			qui replace `v'_lag_avg`s'=ln(`v'_lag_avg`s')*100
			qui replace `v'`s'_1st=ln(`v'`s'_t_plus1)*100
			qui replace `v'`s'_2nd=ln(`v'`s'_t_plus2)*100
			qui replace `v'`s'_3rd=ln(`v'`s'_t_plus3)*100
			qui replace `v'`s'_4th=ln(`v'`s'_t_plus4)*100
			
			}
			
		* Create average outcomes residualized on year and municipality fixed-effects
		qui areg `v'_avg`s' yy*, absorb(mun_code)
		qui predict res_`v'_avg`s', res
		
		qui areg `v'_3y_avg`s' yy*, absorb(mun_code)
		qui predict res_`v'_3y_avg`s', res
		
		qui areg `v'_lag_avg`s' yy*, absorb(mun_code)
		qui predict res_`v'_lag_avg`s', res

		* Create yearly outcomes residualized on year and municipality fixed-effects
		qui areg `v'`s'_1st yy*, absorb(mun_code)
		qui predict res_`v'_1st`s', res
		
		qui areg `v'`s'_2nd yy*, absorb(mun_code)
		qui predict res_`v'_2nd`s', res
		
		qui areg `v'`s'_3rd yy*, absorb(mun_code)
		qui predict res_`v'_3rd`s', res

		qui areg `v'`s'_4th yy*, absorb(mun_code)
		qui predict res_`v'_4th`s', res
		
		qui areg `v'`s'_0 yy*, absorb(mun_code)
		qui predict res_`v'_0`s', res
		
		qui areg `v'`s'_1minus yy*, absorb(mun_code)
		qui predict res_`v'_1minus`s', res
		
		qui areg `v'`s'_2minus yy*, absorb(mun_code)
		qui predict res_`v'_2minus`s', res
		
		qui areg `v'`s'_3minus yy*, absorb(mun_code)
		qui predict res_`v'_3minus`s', res

		}
}

* Compute differenced outcomes (differences between the 4th year of the term and the election year)
foreach v of local vars {
	
	foreach s of local measure {

		*logged changes for variables per capita or in constant prices
		if "`s'"=="_pc"|"`s'"=="_cp" {	
			dis as text "`v'`s': take log differences"
			qui gen `v'_change`s' = (ln(`v'`s'_t_plus4)-ln(`v'`s'_t_0))*100
			}
		* simple change for variables as a share of gdp or as a share of the budget
		else {
			dis as text "`v'`s': take simple differences"
			qui gen `v'_change`s' = `v'`s'_t_plus4-`v'`s'_t_0
		}	

	}
}


***********************************************
** Create "oil windfall" subsample indicator **
***********************************************

qui gen oil_receiver = .

qui replace oil_receiver = 0 if (oil_royalties_cp_t_plus1  == 0  & oil_royalties_cp_t_plus2  == 0  & oil_royalties_cp_t_plus3 == 0  & oil_royalties_cp_t_plus4 == 0  & ///
								 oil_royalties_cp_t_minus3 == 0  & oil_royalties_cp_t_minus2 == 0 & oil_royalties_cp_t_minus1 == 0 & oil_royalties_cp_t_0      == 0) & ///
								(baseline_sample) == 1

qui replace oil_receiver = 1 if (oil_royalties_cp_t_plus1 != 0  | oil_royalties_cp_t_plus2 != 0  | oil_royalties_cp_t_plus3 != 0  | oil_royalties_cp_t_plus4 != 0 |     ///
								 oil_royalties_cp_t_minus3 != 0 | oil_royalties_cp_t_minus2 != 0 | oil_royalties_cp_t_minus1 != 0 | oil_royalties_cp_t_0 != 0      ) &  ///
								(baseline_sample) == 1

tab oil_receiver if baseline_sample==1, missing	

* calculate change in oil royalties (measured as a share of previous-period revenues)
qui egen oil_royalties_avg 		= rowmean(oil_royalties_cp_t_plus1 oil_royalties_cp_t_plus2 oil_royalties_cp_t_plus3 oil_royalties_cp_t_plus4)
qui egen oil_royalties_avg_lag  = rowmean(oil_royalties_cp_t_minus3 oil_royalties_cp_t_minus2 oil_royalties_cp_t_minus1 oil_royalties_cp_t_0)

qui egen denominator			= rowmean(tot_rev_cp_t_minus3 tot_rev_cp_t_minus2 tot_rev_cp_t_minus1 tot_rev_cp_t_0)
qui gen oil_royalties_change	= ((oil_royalties_avg - oil_royalties_avg_lag) / denominator)*100
qui drop denominator 

* identify oil sample
qui gen oil_sample=.
qui sum oil_royalties_change if baseline_sample ==1 & oil_receiver==1, detail
qui replace oil_sample=0 if ( (oil_receiver==0) | (oil_receiver==1 & oil_royalties_change <=`r(p50)' & oil_royalties_change!=.) ) & baseline_sample==1
qui replace oil_sample=1 if oil_royalties_change >`r(p50)' & oil_royalties_change!=.  & oil_receiver == 1 & baseline_sample==1


*****************************************************************************************
** Create "Tiebout competition" subsamples indicators (as in Ferreira & Gyourko, 2009) **
*****************************************************************************************
* Note: for 2004 and 2008 elections use censo 2000; for 2012 election use censo 2010 - all pre-determined

gen H_index = .
replace H_index = H_tiebout_micro_2000 if year<=2010 
replace H_index = H_tiebout_micro_2010 if year>2010 & year!=.

qui gen tiebout_median_sample = .
qui gen tiebout_75th_sample   = .

sum H_index if baseline_sample == 1, detail

replace tiebout_median_sample = 0 	if H_index <= `r(p50)' & H_index!=. & baseline_sample==1
replace tiebout_75th_sample   = 0	if H_index <= `r(p75)' & H_index!=. & baseline_sample==1

replace tiebout_median_sample = 1 	if H_index > `r(p50)' & H_index!=. & baseline_sample==1
replace tiebout_75th_sample   = 1 	if H_index > `r(p75)' & H_index!=. & baseline_sample==1


drop H_tiebout_micro_2000 H_tiebout_micro_2010


***********************************************
** Create Coalition Distance subsamples indicators **
***********************************************

qui gen coal_dist_median_sample = .
qui gen coal_dist_75th_sample   = .

sum coalition_ideo_dist_lagw if baseline_sample == 1, detail

replace coal_dist_median_sample = 0 	if coalition_ideo_dist_lagw <= `r(p50)' & coalition_ideo_dist_lagw != . & baseline_sample==1
replace coal_dist_75th_sample   = 0 	if coalition_ideo_dist_lagw <= `r(p75)' & coalition_ideo_dist_lagw != . & baseline_sample==1

replace coal_dist_median_sample = 1 	if coalition_ideo_dist_lagw > `r(p50)' & coalition_ideo_dist_lagw != . & baseline_sample==1
replace coal_dist_75th_sample   = 1 	if coalition_ideo_dist_lagw > `r(p75)' & coalition_ideo_dist_lagw != . & baseline_sample==1

tab coal_dist_median_sample, missing
tab coal_dist_75th_sample, missing

******************************************************
** Compute term-averages of socio-economic outcomes **
******************************************************

* (a) Education indicators
local educ_vars "ratio_StuTea_ps avgschool_size_ps avgclass_size_ps teachers_100K_ps schools_100K_ps childc_estab_100K presch_estab_100K test_scores_ps prog_rate_ps ideb_ps"

gen insample_educ = 1 if baseline_sample == 1

foreach v of local educ_vars{
	
	gen insample_`v'=1 if baseline_sample==1
	
	replace insample_`v'=0 if `v'_t_plus3==. & baseline_sample == 1
	replace insample_educ=0 if `v'_t_plus3==. & baseline_sample == 1
		
	* Take logs*100 (so coefficients can be interpreted as percentage differences) 
	dis as text "`v'`s': take logs"
	
	qui gen `v'_3rd = ln(`v'_t_plus3)*100
	
	* gen yearly outcomes residualized on year and municipality fixed-effects

	qui areg `v'_3rd yy*, absorb(mun_code)
	qui predict res_`v'_3rd, res
	
	* Check insample indicators
	tab insample_`v'
}

tab insample_educ

* (b) Violence indicators
local violence_vars "homicide_rate"

foreach v of local violence_vars {
	
	gen insample_`v'=1 if baseline_sample==1
	
	* term-average
	gen `v'_avg = (`v'_t_plus1 + `v'_t_plus2 + `v'_t_plus3 + `v'_t_plus4)/4
	
	replace insample_`v'=0 if (`v'_t_plus1==.|`v'_t_plus2==.|`v'_t_plus3==.|`v'_t_plus4==.) & baseline_sample == 1
		
	* Take logs*100 (so coefficients can be interpreted as percentage differences) 
	dis as text "`v'`s': take logs"
	qui replace `v'_avg 	= ln(`v'_avg)*100
			
	* gen average outcomes residualized on year and municipality fixed-effects
	qui areg `v'_avg yy*, absorb(mun_code)
	qui predict res_`v'_avg, res
			
	* Check insample indicators
	tab insample_`v'

}

* (c) Health indicators

local health_vars "clinic_basic_100K clinic_total_100K docs_100K esf_100K infant_mort_rate"

gen insample_health = 1 if baseline_sample == 1

foreach v of local health_vars {

	gen insample_`v'=1 if baseline_sample==1
	
	* ESF data available only 2007 onwards
	if "`v'"=="esf_100K"{
				
		* 4th year in office
		gen `v'_4th = `v'_t_plus4 
		replace insample_`v'=0 if (`v'_t_plus4==.) & baseline_sample == 1
		replace insample_health=0 if (`v'_t_plus4==.) & baseline_sample == 1
		
		* Take logs*100 (so coefficients can be interpreted as percentage differences) 
		qui replace `v'_4th  = ln(`v'_4th)*100
		
		* gen average outcomes residualized on year and municipality fixed-effects
		qui areg `v'_4th yy*, absorb(mun_code)
		qui predict res_`v'_4th, res
		
	} 
	else{
	
		* term-average
		gen `v'_avg = (`v'_t_plus1 + `v'_t_plus2 + `v'_t_plus3 + `v'_t_plus4)/4 /* take average over mayor term */
		replace insample_`v'=0 if (`v'_t_plus1==.| `v'_t_plus2==.|`v'_t_plus3==.|`v'_t_plus4==.) & baseline_sample == 1
		replace insample_health=0 if (`v'_t_plus1==.| `v'_t_plus2==.|`v'_t_plus3==.|`v'_t_plus4==.) & baseline_sample == 1
		
		* Take logs*100 (so coefficients can be interpreted as percentage differences) 
		dis as text "`v'`s': take logs"
		qui replace `v'_avg  = ln(`v'_avg)*100
		
		* gen average outcomes residualized on year and municipality fixed-effects
		qui areg `v'_avg yy*, absorb(mun_code)
		qui predict res_`v'_avg, res
		
	}
	
	* Check insample indicators
	tab insample_`v'
}

tab insample_health

**********************************************************
* Save dataset file to be used in the empirical analysis *
**********************************************************

save "${root}/data/processed/final_sample.dta", replace
*log close
